{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "P-AhVYeBWgQ3"
   },
   "outputs": [],
   "source": [
    "# NOTE: PLEASE MAKE SURE YOU ARE RUNNING THIS IN A PYTHON3 ENVIRONMENT\n",
    "\n",
    "import tensorflow as tf\n",
    "print(tf.__version__)\n",
    "\n",
    "# This is needed for the iterator over the data\n",
    "# But not necessary if you have TF 2.0 installed\n",
    "#!pip install tensorflow==2.0.0-beta0\n",
    "\n",
    "\n",
    "# tf.enable_eager_execution()\n",
    "\n",
    "# !pip install -q tensorflow-datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "_IoM4VFxWpMR"
   },
   "outputs": [],
   "source": [
    "import tensorflow_datasets as tfds\n",
    "imdb, info = tfds.load(\"imdb_reviews\", with_info=True, as_supervised=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "wHQ2Ko0zl7M4"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "# train_data, test_data = imdb['train'], imdb['test']\n",
    "train_data, test_data = imdb['train'].take(4000), imdb['test'].take(1000)\n",
    "\n",
    "training_sentences = []\n",
    "training_labels = []\n",
    "\n",
    "testing_sentences = []\n",
    "testing_labels = []\n",
    "\n",
    "# str(s.tonumpy()) is needed in Python3 instead of just s.numpy()\n",
    "for s,l in train_data:\n",
    "  training_sentences.append(str(s.numpy()))\n",
    "  training_labels.append(l.numpy())\n",
    "  \n",
    "for s,l in test_data:\n",
    "  testing_sentences.append(str(s.numpy()))\n",
    "  testing_labels.append(l.numpy())\n",
    "  \n",
    "training_labels_final = np.array(training_labels)\n",
    "testing_labels_final = np.array(testing_labels)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "7n15yyMdmoH1"
   },
   "outputs": [],
   "source": [
    "vocab_size = 10000\n",
    "embedding_dim = 16\n",
    "max_length = 120\n",
    "trunc_type='post'\n",
    "oov_tok = \"<OOV>\"\n",
    "\n",
    "\n",
    "from tensorflow.keras.preprocessing.text import Tokenizer\n",
    "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
    "\n",
    "tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)\n",
    "tokenizer.fit_on_texts(training_sentences)\n",
    "word_index = tokenizer.word_index\n",
    "sequences = tokenizer.texts_to_sequences(training_sentences)\n",
    "padded = pad_sequences(sequences,maxlen=max_length, truncating=trunc_type)\n",
    "\n",
    "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n",
    "testing_padded = pad_sequences(testing_sequences,maxlen=max_length)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "9axf0uIXVMhO"
   },
   "outputs": [],
   "source": [
    "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])\n",
    "\n",
    "def decode_review(text):\n",
    "    return ' '.join([reverse_word_index.get(i, '?') for i in text])\n",
    "\n",
    "print(decode_review(padded[1]))\n",
    "print(training_sentences[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "5NEpdhb8AxID"
   },
   "outputs": [],
   "source": [
    "model = tf.keras.Sequential([\n",
    "    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n",
    "    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(32)),\n",
    "    tf.keras.layers.Dense(6, activation='relu'),\n",
    "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
    "])\n",
    "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n",
    "model.summary()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "V5LLrXC-uNX6"
   },
   "outputs": [],
   "source": [
    "num_epochs = 50\n",
    "history = model.fit(padded, training_labels_final, epochs=num_epochs, validation_data=(testing_padded, testing_labels_final))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "nHGYuU4jPYaj"
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "\n",
    "def plot_graphs(history, string):\n",
    "  plt.plot(history.history[string])\n",
    "  plt.plot(history.history['val_'+string])\n",
    "  plt.xlabel(\"Epochs\")\n",
    "  plt.ylabel(string)\n",
    "  plt.legend([string, 'val_'+string])\n",
    "  plt.show()\n",
    "\n",
    "plot_graphs(history, 'accuracy')\n",
    "plot_graphs(history, 'loss')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "wSualgGPPK0S"
   },
   "outputs": [],
   "source": [
    "# Model Definition with LSTM\n",
    "model = tf.keras.Sequential([\n",
    "    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n",
    "    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),\n",
    "    tf.keras.layers.Dense(6, activation='relu'),\n",
    "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
    "])\n",
    "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n",
    "model.summary()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {},
    "colab_type": "code",
    "id": "K_Jc7cY3Qxke"
   },
   "outputs": [],
   "source": [
    "# Model Definition with Conv1D\n",
    "model = tf.keras.Sequential([\n",
    "    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n",
    "    tf.keras.layers.Conv1D(128, 5, activation='relu'),\n",
    "    tf.keras.layers.GlobalAveragePooling1D(),\n",
    "    tf.keras.layers.Dense(6, activation='relu'),\n",
    "    tf.keras.layers.Dense(1, activation='sigmoid')\n",
    "])\n",
    "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n",
    "model.summary()\n"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [],
   "name": "Course 3 - Week 3 - Lesson 2d.ipynb",
   "provenance": [],
   "version": "0.3.2"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
